home *** CD-ROM | disk | FTP | other *** search
- {*********************************************************}
- {* FileRegX *}
- {* Copyright (c) Julian M Bucknall 1997 *}
- {* All rights reserved. *}
- {*********************************************************}
- {* Regular expression routines for filename matching *}
- {*********************************************************}
-
- {Note: this unit is released as freeware. In other words, you are free
- to use this unit in your own applications, however I retain all
- copyright to the code. JMB}
-
- unit FileRegX;
-
- {$IFOPT D+}
- {$DEFINE Debug}
- {$ENDIF}
-
- interface
-
- uses
- SysUtils;
-
- const
- c_frxAnyChar = '?'; {match any character}
- c_frxClosure = '*'; {match zero or more characters}
- c_frxPatClosure = '+'; {match zero or more subpatterns}
- c_frxEscape = '\'; {escape character}
- c_frxClassLeft = '['; {char class left bracket}
- c_frxClassRight = ']'; {char class right bracket}
- c_frxNegate = '^'; {char class negation}
- c_frxClassRange = '-'; {char class range character}
-
- type
- TfrxCompileResult = ( {possible compiler result codes}
- frxcrSuccess, {..success, no errors}
- frxcrNoPattern, {..no pattern to compile}
- frxcrNoSubpattern, {..no subpattern for the + closure}
- frxcrMissingChar, {..no literal char for \ escape char}
- frxcrMissingLeft, {..no left bracket for right one}
- frxcrBadClass); {..badly formed class definition}
-
- type
- PfrxBinPattern = pointer;
-
- function FRXCompilePattern(const aPattern : string;
- var aBinPattern : PfrxBinPattern) : TfrxCompileResult;
- {-compiles a pattern string into a binary pattern; returns result of
- compilation: if successful aBinPattern is the binary pattern,
- otherwise it's set to nil}
-
- procedure FRXFreeBinPattern(var aBinPattern : PfrxBinPattern);
- {-free a binary pattern created by FRXCompilePattern}
-
- function FRXMatchesPattern(aBinPattern : PfrxBinPattern;
- const aFileName : string) : boolean;
- {-given a binary pattern and a file name, returns whether the file
- name matches the pattern}
-
- {$IFDEF Debug}
- procedure FRXPrintBinPattern(var aFile : text;
- const aPattern : string;
- aBinPattern : PfrxBinPattern);
- {-DEBUG only: prints a binary pattern to an open text file}
- {$ENDIF}
-
- implementation
-
- uses
- {$IFDEF Windows}
- WinProcs;
- {$ELSE}
- Windows;
- {$ENDIF}
-
- type
- {$IFDEF Windows}
- TMemSize = word;
- {$ELSE}
- TMemSize = integer;
- {$ENDIF}
-
- type
- TCharSet = set of char;
-
- TTokenType = (c_binAnyChar, {?}
- c_binAnyClosure, {* or ?+}
- c_binLiteral, {<char>}
- c_binLitClosure, {<char>+}
- c_binClass, {[...]}
- c_binClsClosure); {[...]+}
-
- PBinPatNode = ^TBinPatNode;
- TBinPatNode = packed record
- bpnNext : PBinPatNode;
- bpnToken : TTokenType;
- bpnChar : char;
- bpnFiller : word;
- bpnCharClass : TCharSet;
- end;
-
- PBinPatHeader = ^TBinPatHeader;
- TBinPatHeader = packed record
- bphSize : TMemSize;
- bphNext : TMemSize;
- bphData : PByteArray;
- end;
-
- const
- XPNormalSize = sizeof(TBinPatNode) - sizeof(TCharSet);
- XPClassSize = sizeof(TBinPatNode);
-
- const
- c_BinPatBlockDelta = 512;
-
- {===Binary pattern memory routines===================================}
- function NewBinaryPattern : PfrxBinPattern;
- begin
- GetMem(Result, sizeof(TBinPatHeader));
- with PBinPatHeader(Result)^ do begin
- bphSize := c_BinPatBlockDelta;
- bphNext := 0;
- GetMem(bphData, c_BinPatBlockDelta);
- end;
- end;
- {--------}
- function AllocPatternNode(var aBP : PfrxBinPattern;
- aTokenType : TTokenType) : PBinPatNode;
- var
- BPHdr : PBinPatHeader absolute aBP;
- ReqBytes : integer;
- Temp : PBinPatNode;
- Dad : PBinPatNode;
- IsFirst : boolean;
- begin
- {if the binary pattern has not yet been allocated then do so}
- IsFirst := false;
- if (aBP = nil) then begin
- aBP := NewBinaryPattern;
- IsFirst := true;
- end;
-
- {calculate the number of bytes required}
- if (aTokenType = c_binClass) then
- ReqBytes := XPClassSize
- else
- ReqBytes := XPNormalSize;
-
- {do we have enough room? if not realloc our binary pattern}
- with BPHdr^ do begin
- if ((bphSize - bphNext) < ReqBytes) then begin
- {$IFDEF Windows}
- ReallocMem(bphData, bphSize, bphSize + c_BinPatBlockDelta);
- {$ELSE}
- ReallocMem(bphData, bphSize + c_BinPatBlockDelta);
- {$ENDIF}
- inc(bphSize, c_BinPatBlockDelta);
- end;
- end;
- {allocate the next node, set its fields}
- with BPHdr^ do begin
- Result := PBinPatNode(@bphData^[bphNext]);
- inc(bphNext, ReqBytes);
- end;
- FillChar(Result^, ReqBytes, 0);
- Result^.bpnToken := aTokenType;
-
- {if it wasn't the first node, make sure it's linked to the others}
- if not IsFirst then begin
- Temp := PBinPatNode(BPHdr^.bphData);
- repeat
- Dad := Temp;
- Temp := Temp^.bpnNext;
- until (Temp = nil);
- Dad^.bpnNext := Result;
- end;
- end;
- {====================================================================}
-
- {===Helper routines==================================================}
- function LowerCaseChar(aCh : char) : char;
- {Convert a character to lowercase using language driver}
- begin
- {$IFDEF Windows}
- Result := char(AnsiLower(pointer(longint(aCh))));
- {$ELSE}
- Result := char(CharLower(pointer(longint(aCh))));
- {$ENDIF}
- end;
- {--------}
- procedure NegateSet(var S : TCharSet);
- {Negate a character set}
- var
- BA : TByteArray absolute S;
- i : integer;
- begin
- for i := 0 to pred(sizeof(S)) do
- BA[i] := not BA[i];
- end;
- {--------}
- function CloseLastPatternToken(aBinPattern : PfrxBinPattern) : boolean;
- {Given a binary pattern, attempts to close the last node, returns true
- if successful}
- var
- Temp : PBinPatNode;
- Dad : PBinPatNode;
- begin
- {can't be successful if there is no binary pattern}
- if (aBinPattern = nil) then begin
- Result := false;
- Exit;
- end;
- {find the last node}
- Temp := PBinPatNode(PBinPatHeader(aBinPattern)^.bphData);
- repeat
- Dad := Temp;
- Temp := Temp^.bpnNext;
- until (Temp = nil);
- {close it}
- Result := true;
- case Dad^.bpnToken of
- c_binAnyChar : Dad^.bpnToken := c_binAnyClosure;
- c_binLiteral : Dad^.bpnToken := c_binLitClosure;
- c_binClass : Dad^.bpnToken := c_binClsClosure;
- else
- {oops, already closed}
- Result := false;
- end;{case}
- end;
- {--------}
- function MatchOneChar(aToken : PBinPatNode;
- aCh : char) : boolean;
- {Given a pattern token and a character, returns true if the char
- matches the token}
- begin
- case aToken^.bpnToken of
- c_binAnyChar,
- c_binAnyClosure : Result := true;
- c_binLiteral,
- c_binLitClosure : Result := aToken^.bpnChar = LowerCaseChar(aCh);
- c_binClass,
- c_binClsClosure : Result := LowerCaseChar(aCh) in aToken^.bpnCharClass;
- else
- Result := false;
- end;
- end;
- {--------}
- function ParseCharClass(const aPattern : string;
- aPatLen : integer;
- var aInx : integer;
- aToken : PBinPatNode) : TfrxCompileResult;
- {Parses a character class definition from a pattern string into a
- pattern node; returns error code if any error encountered}
- type
- TRangeState = (CouldStart, Started, Completed);
- var
- FirstInx : integer;
- FirstChar: char;
- Ch : char;
- ChInx : char;
- NegatedClass : boolean;
- FoundRightBracket : boolean;
- RangeState : TRangeState;
- begin
- {Input: aPattern is the pattern string
- aPatLen is its length
- aInx is the position of the left bracket
- aToken is the new token to hold the class definition
- Output: Result is the error code
- aInx is the position of the right bracket if successful
- aToken has the class definition}
-
- {assume we fail}
- Result := frxcrBadClass;
-
- {assume that the class is not negated, and we shall not find the
- right bracket, and that ranges are complete}
- NegatedClass := false;
- FoundRightBracket := false;
- RangeState := Completed;
-
- {fool compiler hints/warnings}
- FirstChar := #0;
-
- {wander through the pattern string character by character}
- FirstInx := succ(aInx);
- while (aInx < aPatLen) do begin
- inc(aInx);
-
- {look for a char class metacharacter}
- Ch := LowerCaseChar(aPattern[aInx]);
- case Ch of
-
- c_frxEscape :
- begin
- {the escape character cannot be the last character}
- if (aInx = aPatLen) then
- Exit;
- inc(aInx);
- Ch := LowerCaseChar(aPattern[aInx]);
- {it's now a literal character; there are two cases, it's the
- end of a range or it isn't}
- if (RangeState = Started) then begin
- if (Ch <= FirstChar) then
- Exit;
- for ChInx := succ(FirstChar) to Ch do
- Include(aToken^.bpnCharClass, LowerCaseChar(ChInx));
- RangeState := Completed;
- end
- else begin
- Include(aToken^.bpnCharClass, Ch);
- FirstChar := Ch;
- RangeState := CouldStart;
- end;
- end;
-
- c_frxNegate :
- begin
- {the class negation can only be the first character}
- if (aInx <> FirstInx) then
- Exit;
- {make a note that we have a negated class}
- NegatedClass := true;
- {advance the first character, in effect ignoring the
- negation metacharacter}
- inc(FirstInx);
- end;
-
- c_frxClassRight :
- begin
- {the right bracket cannot be the first character}
- if (aInx = FirstInx) then
- Exit;
- {make a note that we found the right bracket and break out
- of the loop}
- FoundRightBracket := true;
- Break;
- end;
-
- c_frxClassRange :
- begin
- {if this is the first character in the class then it's a
- literal character}
- if (aInx = FirstInx) then
- Include(aToken^.bpnCharClass, c_frxClassRange)
- {otherwise it's a range character, so we must be able to
- start a range}
- else if (RangeState <> CouldStart) then
- Exit
- {make a note that we're in a range}
- else
- RangeState := Started;
- end;
- else
- {it's a literal character; there are two cases, it's the end of
- a range or it isn't}
- if (RangeState = Started) then begin
- if (Ch <= FirstChar) then
- Exit;
- for ChInx := succ(FirstChar) to Ch do
- Include(aToken^.bpnCharClass, LowerCaseChar(ChInx));
- RangeState := Completed;
- end
- else begin
- Include(aToken^.bpnCharClass, Ch);
- FirstChar := Ch;
- RangeState := CouldStart;
- end;
- end;{case}
- end;
-
- {do a final check on everything being OK (ie, we found a right
- bracket and we're not in the middle of parsing a range)}
- if (not FoundRightBracket) or (RangeState = Started) then
- Exit;
-
- {if the class is negated then negate the definition}
- if NegatedClass then
- NegateSet(aToken^.bpnCharClass);
-
- {all's well}
- Result := frxcrSuccess;
- end;
- {====================================================================}
-
-
- {===Interfaced routines==============================================}
- procedure FRXFreeBinPattern(var aBinPattern : PfrxBinPattern);
- begin
- if (aBinPattern <> nil) then begin
- with PBinPatHeader(aBinPattern)^ do
- FreeMem(bphData, bphSize);
- FreeMem(aBinPattern, sizeof(TBinPatHeader));
- aBinPattern := nil;
- end;
- end;
- {--------}
- function FRXCompilePattern(const aPattern : string;
- var aBinPattern : PfrxBinPattern) : TfrxCompileResult;
- var
- Ch : char;
- Inx : integer;
- PatLen : integer;
- Token : PBinPatNode;
- begin
- {assume success}
- Result := frxcrSuccess;
-
- {start the binary pattern off}
- aBinPattern := nil;
-
- {an empty pattern string is invalid}
- PatLen := length(aPattern);
- if (PatLen = 0) then begin
- Result := frxcrNoPattern;
- Exit;
- end;
-
- {wander through the pattern string character by character}
- Inx := 0;
- while (Inx < PatLen) do begin
- inc(Inx);
-
- {look for a metacharacter}
- Ch := LowerCaseChar(aPattern[Inx]);
- case Ch of
-
- {for ? and * just create a new pattern token}
- c_frxAnyChar :
- begin
- AllocPatternNode(aBinPattern, c_binAnyChar);
- end;
- c_frxClosure :
- begin
- AllocPatternNode(aBinPattern, c_binAnyClosure);
- end;
-
- {with the subpattern closure, there must be a prior subpattern
- that's not already closed}
- c_frxPatClosure :
- begin
- if not CloseLastPatternToken(aBinPattern) then begin
- FRXFreeBinPattern(aBinPattern);
- Result := frxcrNoSubpattern;
- Exit;
- end;
- end;
-
- {the escape character cannot appear at the end of the pattern}
- c_frxEscape :
- begin
- if (Inx = PatLen) then begin
- FRXFreeBinPattern(aBinPattern);
- Result := frxcrMissingChar;
- Exit;
- end;
- Token := AllocPatternNode(aBinPattern, c_binLiteral);
- inc(Inx);
- Token^.bpnChar := LowerCaseChar(aPattern[Inx]);
- end;
-
- {fun one: the left bracket at the start of a character class}
- c_frxClassLeft :
- begin
- {it can't appear at the end of the pattern}
- if (Inx = PatLen) then begin
- FRXFreeBinPattern(aBinPattern);
- Result := frxcrBadClass;
- Exit;
- end;
- {create a new token as if everything was OK}
- Token := AllocPatternNode(aBinPattern, c_binClass);
- {parse the character class}
- Result := ParseCharClass(aPattern, PatLen, Inx, Token);
- if (Result <> frxcrSuccess) then begin
- FRXFreeBinPattern(aBinPattern);
- Exit;
- end;
- end;
-
- {the right bracket cannot appear without a left one}
- c_frxClassRight :
- begin
- FRXFreeBinPattern(aBinPattern);
- Result := frxcrMissingLeft;
- Exit;
- end;
-
- else
- {any other character is a literal}
- Token := AllocPatternNode(aBinPattern, c_binLiteral);
- Token^.bpnChar := Ch;
- end;{case}
- end;
- end;
- {--------}
- function FRXMatchesPattern(aBinPattern : PfrxBinPattern;
- const aFileName : string) : boolean;
- type
- TCheckPoint = packed record
- cpToken : PBinPatNode;
- cpStart : word;
- cpInx : word;
- end;
- var
- FNLen : integer;
- Inx : integer;
- StartInx: integer;
- Token : PBinPatNode;
- BadSimpleMatch : boolean;
- TokenSP : integer;
- TokenStack : array [0..127] of TCheckPoint;
- begin
- {assume that we'll fail}
- Result := false;
-
- {if the pattern is empty, there's no match}
- if (aBinPattern = nil) then
- Exit;
-
- {if the filename is the empty string, there's no match}
- FNLen := length(aFileName);
- if (FNLen = 0) then
- Exit;
-
- {prepare closure token stack to be empty}
- TokenSP := -1;
-
- {prepare for loop}
- Token := PBinPatNode(PBinPatHeader(aBinPattern)^.bphData);
- Inx := 1;
- while True do begin
- BadSimpleMatch := false;
- case Token^.bpnToken of
- c_binAnyClosure :
- begin
- {push it onto the stack as a greedy token}
- inc(TokenSP);
- with TokenStack[TokenSP] do begin
- cpToken := Token;
- cpStart := Inx;
- cpInx := succ(FNLen);
- end;
- {indicate we've matched everything}
- Inx := succ(FNLen);
- {advance the token}
- Token := Token^.bpnNext;
- end;
- c_binLitClosure,
- c_binClsClosure :
- begin
- {match as many chars as we can}
- StartInx := Inx;
- while (Inx <= FNLen) and
- MatchOneChar(Token, aFileName[Inx]) do
- inc(Inx);
- {if we matched at least one char...}
- if (StartInx < Inx) then begin
- {push it onto the stack as a greedy token}
- inc(TokenSP);
- with TokenStack[TokenSP] do begin
- cpToken := Token;
- cpStart := StartInx;
- cpInx := Inx;
- end;
- end;
- {advance the token}
- Token := Token^.bpnNext;
- end;
- else {the current token is a simple token}
- {if there is a current character and it matches the current
- token, advance}
- if (Inx <= FNLen) and
- MatchOneChar(Token, aFileName[Inx]) then begin
- Token := Token^.bpnNext;
- inc(Inx);
- end
- {otherwise there is no current character or it did not match}
- else begin
- {if there is no closure to revert to, we're done but failed}
- if (TokenSP = -1) then
- Exit;
- {make a note we failed to match: this'll trigger an operation
- at the end of the loop to revert to a previous closure}
- BadSimpleMatch := true;
- end;
- end;{case}
-
- {we're finished and successful if the current token is nil (ie, we
- ran out of tokens) and the current character index is greater
- than the length of the string (ie, we ran out of string)}
- if (Token = nil) and (Inx > FNLen) then begin
- Result := true;
- Exit;
- end;
-
- {if the current token is nil or there was a bad simple match, we
- need to revert to a previous closure and back up one character}
- if (Token = nil) or BadSimpleMatch then begin
- while (TokenSP <> -1) do begin
- with TokenStack[TokenSP] do begin
- if (cpInx > cpStart) then begin
- dec(cpInx);
- Token := cpToken^.bpnNext;
- Inx := cpInx;
- Break;{out of while loop}
- end;
- dec(TokenSP);
- end;
- end;
- {if there are no more closures or the current token is still
- nil, we're done but failed}
- if (TokenSP = -1) or (Token = nil) then
- Exit;
- end;
- end;{of forever loop}
- end;
- {--------}
- {$IFDEF Debug}
- procedure FRXPrintBinPattern(var aFile : text;
- const aPattern : string;
- aBinPattern : PfrxBinPattern);
- var
- Ch : char;
- Temp : PBinPatNode;
- begin
- writeln(aFile, 'Binary pattern print of "', aPattern, '"');
- Temp := PBinPatNode(PBinPatHeader(aBinPattern)^.bphData);
- while (Temp <> nil) do begin
- case Temp^.bpnToken of
- c_binAnyChar : writeln(aFile, '<any char> ');
- c_binAnyClosure : writeln(aFile, '<closure> ');
- c_binLiteral : writeln(aFile, '<literal> [', Temp^.bpnChar, ']');
- c_binLitClosure : writeln(aFile, '<lit.closure> [', Temp^.bpnChar, ']');
- c_binClass,
- c_binClsClosure : begin
- if (Temp^.bpnToken = c_binClass) then
- write(aFile, '<char class> ')
- else
- write(aFile, '<classclosure>');
- for Ch := #0 to #63 do begin
- if Ch in Temp^.bpnCharClass then
- write(aFile, '+')
- else
- write(aFile, '.');
- end;
- for Ch := #64 to #255 do begin
- if ((ord(Ch) mod 64) = 0) then begin
- writeln(aFile);
- write(aFile, ' ');
- end;
- if Ch in Temp^.bpnCharClass then
- write(aFile, '+')
- else
- write(aFile, '.');
- end;
- writeln(aFile);
- end;
- else
- writeln(aFile, '***unknown token***');
- end;{case}
- Temp := Temp^.bpnNext;
- end;
- writeln(aFile, '---');
- end;
- {$ENDIF}
- {====================================================================}
-
- end.
-